cat(sprintf("\nScript 2: Wrangle Cint data (%s)\n", f_time_gmt()))

# Load data --------------------------------------------------------------------

# Load data (in wide format)
cint_w_raw <- fread(here("data", "cint", "cint_qualtrics_results.csv"))

# Only keep responses with valid rid and consent
cint_w <- copy(cint_w_raw)[
  str_length(rid) == 36 & consent_agree == "Yes" & thoughtful == "Yes",
]

# If a participant (by survey company ID) responded more than once, keep last response
cint_w[, n_rid := .N, by = rid]
cint_w <- cint_w[order(EndDate),]
cint_w[, seq_rid := 1:.N, by = rid]
cint_w <- rbind(cint_w[n_rid == 1,], cint_w[n_rid > 1 & n_rid == seq_rid])
cint_w <- cint_w[order(EndDate),]

# Handle background variables --------------------------------------------------

# Clean education variable

## convert to integer
cint_w[, education_num := as.integer(education)]

## recode -3105 to NA
cint_w[education_num < 0, education_num := NA_integer_]

## assign labels
cint_w[, education := fct_recode(
  as.character(education_num),
  "Some high school or less" = "1",
  "High school graduate" = "2",
  "Other post high school vocational training" = "3",
  "Completed some college, but no degree" = "4",
  "Associate's degree" = "5",
  "Bachelor's degree" = "6",
  "Master's or professional degree" = "7",
  "Doctorate degree" = "8"
)]

cint_w[, ed4 := fcase(
  education_num %in% 1:2, "HS or less",
  education_num %in% 3:4, "Some college",
  education_num %in% 5:6, "College",
  education_num %in% 7:8, 'Postgraduate'
) |> factor(levels = c("HS or less", "Some college", "College", "Postgraduate"))]

# Create numeracy (CRT) dummies and sum
cint_w[
  , ":="(crt1_dummy = fifelse(str_detect(crt1, "^5[^\\d]*$"), 1L, 0L),
         crt2_dummy = fifelse(str_detect(crt2, "^5[^\\d]*$"), 1L, 0L),
         crt3_dummy = fifelse(str_detect(crt3, "^47[^\\d]*$"), 1L, 0L))
][
  , numeracy := crt1_dummy + crt2_dummy + crt3_dummy
]

# Create political knowledge dummies and sum
cint_w[
  , ":="(know_house_dummy = as.integer(know_house == "Republicans"),
         know_senate_dummy = as.integer(know_house == "Republicans"),
         know_vance_dummy = as.integer(know_vance == "Vice President"),
         know_roberts_dummy = as.integer(know_roberts == "Chief Justice of the US Supreme Court"))
][
  , pol_awareness := know_house_dummy + know_senate_dummy + know_vance_dummy +
    know_roberts_dummy
]

# Create mock vignette dummies, sum, and categorical version
cint_w[
  , ":="(mock1_dummy = as.integer(mock1 == "Event licensing"),
         mock2_dummy = as.integer(mock2 == "A single license will cover all events occurring in a stadium"),
         mock3_dummy = as.integer(mock3 == "There may be a special hearing held by lawmakers"))
][
  , mock_count := mock1_dummy + mock2_dummy + mock3_dummy
][, f_mock_count := fcase(
  mock_count == 0, "None",
  mock_count %in% 1:2, "Some",
  mock_count == 3, "All"
) |> fct_relevel("None", "Some")
]

# Pivot to long ----------------------------------------------------------------

# Extract subset of the data with only acquiescence questions

vars_treatment <- names(cint_w) |>
  str_subset("__js.*_d$") |> 
  str_subset("ballot", T)

vars_responses <- vars_treatment |>
  str_remove("__js_") |>
  str_remove("_d$") |> 
  str_subset("ballot", T)

vars <- c("rid", vars_treatment, vars_responses)
cint_ss <- copy(cint_w)[, ..vars]
setnames(cint_ss, vars_treatment, str_remove(vars_treatment, "__js_"))
setnames(cint_ss, vars_responses, str_c(vars_responses, "_r"))

# Pivot wide-to-long: make unit of observation the respondent-question

cint_long <- cint_ss |>
  # pivot
  melt(
    id.vars = "rid",
    # keep treatment and response
    measure.vars = patterns("treatment" = "_d$", "response" = '_r$')
  ) |>
  # merge-in question labels
  merge(data.table(q = vars_responses,
                   variable = as.factor(1:length(vars_responses))),
        all.x = T, sort = F)

# reorder columns
setcolorder(cint_long, c("rid", "q", "treatment", "response"))
# refactor variables
cint_long[, variable := NULL]
cint_long[, treatment := as.integer(treatment)]
cint_long[, newdv := fcase(
  treatment == 1 & response %in% c("Yes, true"), 1,
  treatment == 0 & response %in% c("No false"), 1,
  treatment == 1 & response %in% c("No false"), 0,
  treatment == 0 & response %in% c("Yes, true"), 0,
  response == "Not sure", 0.5,
  default = NA_real_
)]
cint_long[, response := NULL]
setnames(cint_long, "rid", "r_id")

# Add covariates to long dataset

cint <- merge(
  cint_long, 
  cint_w[, .(r_id = rid, sees_not_sure = `__js_sees_not_sure`, 
             education, education_num, ed4,
             numeracy, crt1_dummy, crt2_dummy, crt3_dummy,
             pol_awareness,  know_house_dummy, know_senate_dummy, know_vance_dummy, know_roberts_dummy,
             mock1_dummy, mock2_dummy, mock3_dummy, mock_count, f_mock_count)],
  all.x = T, sort = F
)

# Save intermediate dataset

cat(sprintf("--Save proc/cint.rds (%s)\n", f_time_gmt()))
saveRDS(cint, here("proc", "cint.rds"))
